This notebook defines the adaptive level equalization algorithm (on a spectrogram), as defined in Towsey 2013, for the removal of background noise from a recording of the natural acoustic environment. Towsey modified the algorithm first defined in Lamel et al. 1981, which was "originally used for end-point detection in speech recordings (Towsey 2013)."
Towsey, Michael W. (2013) Noise removal from wave-forms and spectrograms derived from natural recordings of the environment.
Lamel, L. F., Rabiner, L. R., Rosenberg, A. E., & Wilpon, J. G. (1981). An improved endpoint detector for isolated word recognition. IEEE Trans. ASSP, ASSP-29, 777-785.
In [1]:
import numpy
import pandas
from numba import guvectorize, float64, int64
from nacoustik import Wave
from nacoustik.utilities import sum_decibels
from nacoustik.spectrum import psd
import matplotlib.pyplot as plt
%matplotlib inline
sound — filepath, wavescape.Wave object, or numpy array of wave signal
N — decimal value (0 - 1) to determine signal cutoff levels
denoise_iterations — number of iterations to run the denoise algorithm
In [2]:
sound = "/Users/Jake/Desktop/seewave_examples/160317-145000_24k.wav"
In [3]:
N = 0.18
In [4]:
denoise_iterations = 3
load wave file and compute the psd spectrogram
f = frequency bins
t = time steps
a = amplitude values (decibels)
In [5]:
sound = Wave(sound)
sound.read()
sound.normalize()
f, t, a = psd(sound)
1 – determine the number of histogram bins (1/8th of the total number of frequency bins in the spectrogram)
In [6]:
n_bins = numpy.round(a.shape[1] / 8).astype(numpy.int)
print("n_bins: {0}".format(n_bins))
2 – calculate histograms of intensity values for each frequency bin
In [7]:
# implemented as a universal function (ufunc) via numba.guvectorize
@guvectorize([(float64[:,:,:], int64[:], int64[:], float64[:,:,:], float64[:,:,:])],
'(c,f,t),(h),(e)->(c,f,h),(c,f,e)', nopython=True)
def calculate_histograms(a, h_bins, e_bins, hists, edges):
for channel in range(a.shape[0]):
for f_band in range(a.shape[1]):
hists[channel, f_band], edges[channel, f_band] = numpy.histogram(a[channel, f_band], h_bins[0])
# allocate arrays for histograms and edge values
shape_histograms = (a.shape[0], a.shape[1], n_bins)
shape_edges = (a.shape[0], a.shape[1], n_bins + 1)
histograms = numpy.empty(shape_histograms)
edges = numpy.empty(shape_edges)
# call 'calculate_histograms' ufunc
histograms, edges = calculate_histograms(a,
# number of bins in histograms, as an array (hack)
# necessary for numba.guvectorize function
numpy.ones(shape=(n_bins), dtype=numpy.int64) * n_bins,
# number of histogram edges, as an array (hack)
numpy.ones(shape=(n_bins + 1), dtype=numpy.int64) * (n_bins + 1),
histograms, edges)
3 — smooth the histograms with a moving average filter (window = 5)
4 — determine the modal intensity of each histogram (the bin containing the most counts)
5 — calculate the number of counts for one standard deviation (68%) of counts below the modal intensity
6 — compute a signal cutoff value (values under which are noise)
N — decimal value (0 - 1) to determine signal cutoff levels
cutoff count = N * total counts below modal intensity * 68%
cutoff value = lower edge value of the cutoff bin, found by
accumulating bin counts from the left side of the histogram
until the cutoff count is reached
helper function to determine the cutoff bin
In [8]:
def find_cutoff_index(histogram, cutoff_count):
cumsum = 0.0
for index, value in histogram.iteritems():
cumsum += value
if cumsum >= cutoff_count:
break
return index
calculate cutoff values
In [9]:
# allocate array for modal and cutoff values
cutoffs = numpy.empty(shape=(histograms.shape[0], histograms.shape[1]))
modals = numpy.empty(shape=(histograms.shape[0], histograms.shape[1]))
# loop through all histograms for each frequency band
for channel in range(a.shape[0]):
# allocate minimum modal variable
modal_minimum = 0
for f_band in range(a.shape[1]):
# convert histogram to pandas.Series (to use moving average function)
histogram = pandas.Series(histograms[channel, f_band])
# smooth
histogram = histogram.rolling(center=False, window=5).mean()
# replace NaN values generated by moving average
histogram.replace(numpy.NaN, 0, inplace=True)
# determine modal value
modal_index = histogram.idxmax()
modal = edges[channel, f_band, modal_index]
if modal > modal_minimum:
modals[channel, f_band] = modal_minimum
else:
modal_minimum = modal
modals[channel, f_band] = modal_minimum
# determine cutoff value
cutoff_count = histogram[0:modal_index].sum() * 0.68 * N
cutoffs[channel, f_band] = edges[channel, f_band, find_cutoff_index(histogram, cutoff_count)]
plot cutoff values (channel 1)
In [10]:
fig, ax = plt.subplots(1, 2)
fig.set_figwidth(15)
# cutoff values over frequency for channel 1
pl = ax[0].plot(f, cutoffs[channel], color='black')
tl = ax[0].set_title('background noise')
xl0 = ax[0].set_xlabel('frequency (herz)')
yl0 = ax[0].set_ylabel('intensity (decibels)')
# smoothed histogram for the last (highest) frequency band in channel 1
width = edges[0, -1, 1] - edges[0, -1, 0]
pr = ax[1].bar(edges[0, -1, :-1], histogram.values, width=width,
color='0.8', edgecolor='0.2')
# annotate
bars = pr.get_children()
modal_bin = bars[histogram.idxmax()]
modal_bin.set_facecolor('0.5')
modal_bin.set_hatch('/')
cutoff_index = find_cutoff_index(histogram, cutoff_count)
cutoff_bin = bars[cutoff_index]
for i in range(cutoff_index):
b = bars[i]
b.set_facecolor('white')
tr = ax[1].set_title("histogram of intensity values between {0:.0f} and {1:.0f} herz".format(f[-2], f[-1]))
xl1 = ax[1].set_xlabel('intensity (decibels)')
yl1= ax[1].set_ylabel('count')
a1 = ax[1].annotate("modal intensity", xy=(modal_bin.get_x(), 0), xytext=(-220, 600), ha='right',
arrowprops=dict(arrowstyle='->',
connectionstyle='angle3,angleA=0,angleB=-45',
relpos=(1., 0.)))
a2 = ax[1].annotate("cutoff intensity", xy=(cutoff_bin.get_x(), 0), xytext=(-225, 400), ha='right',
arrowprops=dict(arrowstyle='->',
connectionstyle='angle3,angleA=0,angleB=-45',
relpos=(1., 0.)))
1 — smooth cutoff values
In [11]:
#cutoffs_smooth = numpy.empty(shape=cutoffs.shape)
for channel in range(a.shape[0]):
smoothed = pandas.Series(cutoffs[channel]).rolling(center=False, window=5).mean()
# replace NaN values
smoothed.replace(numpy.NaN, smoothed.max(), inplace=True)
cutoffs[channel] = smoothed.values
plot smoothed cutoff values
In [12]:
fig, ax = plt.subplots(1)
fig.set_figwidth(15)
# cutoff values over frequency for channel 1
pl = ax.plot(f, cutoffs[channel], color='black')
tl = ax.set_title('background noise (smoothed cutoff values)')
xl0 = ax.set_xlabel('frequency (herz)')
yl0 = ax.set_ylabel('intensity (decibels)')
2 — subtract the cutoff values from the psd value for each frequency band
In [13]:
ale = numpy.empty_like(a)
it = numpy.nditer([cutoffs], flags=['c_index', 'multi_index'], op_flags=[['readonly']])
while not it.finished:
ale[it.multi_index[0], it.multi_index[1]] = a[it.multi_index[0], it.multi_index[1]] - it.value
it.iternext()
set all negative values to zero
In [14]:
ale = numpy.select(condlist=[ale > 0], choicelist=[ale], default=0)
"We implement an additional noise removal step to compensate for setting N to a small value in step A.6. This technique better preserves the structural integrity of complex acoustic events (e.g. bird calls) but removes noise from background locations further removed from that event. (Towsey 2013)"
1 — iterate over the all values in the spectrogram and compute the average value of the neighborhood (3 time steps x 9 frequency bins)
2 — if the average is below a specified threshold (10), set the current value to the minimum of the neighborhood
refer to the profiling results of the denoise algorithm
In [15]:
# implemented as a universal function via numba.guvectorize
@guvectorize([(float64[:,:,:], float64[:,:,:])], '(c,f,t)->(c,f,t)', nopython=True)
def denoise(a, b):
for channel in range(2):
for f_band in range(4, a.shape[1] - 4):
for t_step in range(1, a.shape[2] - 1):
neighborhood = a[channel, f_band - 4:f_band + 5, t_step - 1:t_step + 2]
if neighborhood.mean() < 10:
b[channel, f_band, t_step] = neighborhood.min()
else:
b[channel, f_band, t_step] = neighborhood[4, 1]
In [16]:
for i in range(denoise_iterations):
ale = denoise(ale, numpy.zeros_like(ale))
plot result (channel 1)
In [17]:
# configure figure
dpi = 192
fig = plt.figure(figsize=((920 / dpi) * 3, (460 / dpi) * 3), dpi=dpi)
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
fig.set_frameon(False)
# specify frequency bins (width of 1 kiloherz)
bins = numpy.arange(0, (sound.rate / 2), 1000)
# original
ax_spec_1 = plt.subplot(211)
spec_1 = ax_spec_1.pcolormesh(t, f, a[0], cmap='viridis')
ax_spec_1.set(ylim=([0, sound.rate / 2]),
#xticks = numpy.arange(30, sound.duration, 30).astype(numpy.int),
yticks = bins.astype(numpy.int) + 1000)
ax_spec_1.tick_params(length=12, color='white',
bottom=True, labelbottom=False,
top=False, labeltop=False,
labelleft=False,
labelright=False)
ax_spec_1.set_frame_on(False)
# after adaptive level equalization
ax_spec_2 = plt.subplot(212)
spec_2 = ax_spec_2.pcolormesh(t, f, ale[0], cmap='viridis')
ax_spec_2.set(ylim=([0, sound.rate / 2]),
#xticks = numpy.arange(30, sound.duration, 30).astype(numpy.int),
yticks = bins.astype(numpy.int) + 1000)
ax_spec_2.tick_params(length=12, color='white',
bottom=True, labelbottom=False,
top=False, labeltop=False,
labelleft=False,
labelright=False)
ax_spec_2.set_frame_on(False)
# annotation
t1 = ax_spec_1.text(sound.duration/2, sound.rate/2, 'original', color='white',
ha='center', va='top', size=15)
t2 = ax_spec_2.text(sound.duration/2, sound.rate/2,
"after adaptive level equalization", color='white',
ha='center', va='top', size=15)
In [18]:
def subtract_decibels(a, b):
c = (10**(a / 10)) - (10**(b / 10))
c[c <= 0] = 1
return 10 * numpy.log10(c)
In [19]:
mask = numpy.ma.masked_not_equal(ale, value=0).mask
a_mask = a * mask
it = numpy.nditer([modals], flags=['c_index', 'multi_index'], op_flags=[['readonly']])
while not it.finished:
a_mask[it.multi_index[0], it.multi_index[1]] = subtract_decibels(a_mask[it.multi_index[0], it.multi_index[1]], it.value)
it.iternext()
ale = a_mask * mask
In [25]:
# configure figure
dpi = 192
fig = plt.figure(figsize=((920 / dpi) * 3, (460 / dpi) * 3), dpi=dpi)
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
fig.set_frameon(False)
# specify frequency bins (width of 1 kiloherz)
bins = numpy.arange(0, (sound.rate / 2), 1000)
# original
ax_spec_1 = plt.subplot(211)
spec_1 = ax_spec_1.pcolormesh(t, f, a[0], cmap='viridis')
ax_spec_1.set(ylim=([0, sound.rate / 2]),
#xticks = numpy.arange(30, sound.duration, 30).astype(numpy.int),
yticks = bins.astype(numpy.int) + 1000)
ax_spec_1.tick_params(length=12, color='white',
bottom=True, labelbottom=False,
top=False, labeltop=False,
labelleft=False,
labelright=False)
ax_spec_1.set_frame_on(False)
# after adaptive level equalization
ax_spec_2 = plt.subplot(212)
spec_2 = ax_spec_2.pcolormesh(t, f, numpy.ma.masked_equal(ale[0], 0), cmap='viridis', )
ax_spec_2.set(ylim=([0, sound.rate / 2]),
#xticks = numpy.arange(30, sound.duration, 30).astype(numpy.int),
yticks = bins.astype(numpy.int) + 1000)
ax_spec_2.tick_params(length=12, color='white',
bottom=True, labelbottom=False,
top=False, labeltop=False,
labelleft=False,
labelright=False)
ax_spec_2.set_frame_on(False)
# annotation
t1 = ax_spec_1.text(sound.duration/2, sound.rate/2, 'original', color='white',
ha='center', va='top', size=15)
t2 = ax_spec_2.text(sound.duration/2, sound.rate/2,
"after adaptive level equalization", color='black',
ha='center', va='top', size=15)